library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
We are looking at data for Women’s AFL.
There are two tables:
The data was sourced from this website: https://github.com/ropenscilabs/ozwomensport/tree/master/AFLW/data
Read them into R:
# to clean up the names of the data
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
# clean_names is this nice function that makes nice variable names
# so you don't have to refer to vars like `with spaces` --> with_spaces.
players <- read_csv("data/players.csv") %>% clean_names()
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## Player = col_character(),
## Club = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
teams <- read_csv("data/teams.csv") %>% clean_names()
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## Club = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
Have a look at the data with View() or glimpse().
glimpse(teams)
## Rows: 16
## Columns: 52
## $ club <chr> "Adelaide Crows", "Brisbane Lions", "Carlton", "Coll…
## $ kicks_tot <dbl> 1052, 977, 780, 838, 818, 758, 911, 706, 906, 1077, …
## $ kicks_avg <dbl> 131.5, 122.1, 111.4, 119.7, 116.9, 108.3, 130.1, 100…
## $ handballs_tot <dbl> 458, 416, 424, 335, 409, 351, 575, 510, 312, 574, 36…
## $ handballs_avg <dbl> 57.3, 52.0, 60.6, 47.9, 58.4, 50.1, 82.1, 72.9, 44.6…
## $ disposals_tot <dbl> 1510, 1393, 1204, 1173, 1227, 1109, 1486, 1216, 1218…
## $ disposals_avg <dbl> 188.8, 174.1, 172.0, 167.6, 175.3, 158.4, 212.3, 173…
## $ cont_poss_tot <dbl> 866, 745, 673, 632, 650, 598, 728, 629, 684, 824, 62…
## $ cont_poss_avg <dbl> 108.3, 93.1, 96.1, 90.3, 92.9, 85.4, 104.0, 89.9, 97…
## $ uncont_poss_tot <dbl> 610, 637, 541, 550, 564, 480, 758, 597, 495, 789, 49…
## $ uncont_poss_avg <dbl> 76.3, 79.6, 77.3, 78.6, 80.6, 68.6, 108.3, 85.3, 70.…
## $ disp_eff_percent <dbl> 54.5, 56.1, 59.6, 56.6, 54.1, 54.4, 60.8, 58.5, 54.2…
## $ clangers_tot <dbl> 347, 319, 271, 278, 301, 245, 291, 256, 303, 298, 30…
## $ clangers_avg <dbl> 43.4, 39.9, 38.7, 39.7, 43.0, 35.0, 41.6, 36.6, 43.3…
## $ marks_tot <dbl> 248, 301, 229, 278, 232, 203, 266, 168, 208, 351, 19…
## $ marks_avg <dbl> 31.0, 37.6, 32.7, 39.7, 33.1, 29.0, 38.0, 24.0, 29.7…
## $ cont_marks_tot <dbl> 46, 57, 52, 44, 33, 21, 38, 24, 31, 53, 38, 46, 14, …
## $ cont_marks_avg <dbl> 5.8, 7.1, 7.4, 6.3, 4.7, 3.0, 5.4, 3.4, 4.4, 6.6, 5.…
## $ marks50_tot <dbl> 55, 47, 45, 42, 36, 31, 46, 31, 24, 59, 25, 37, 31, …
## $ marks50_avg <dbl> 6.9, 5.9, 6.4, 6.0, 5.1, 4.4, 6.6, 4.4, 3.4, 7.4, 3.…
## $ hit_outs_tot <dbl> 189, 173, 195, 256, 116, 200, 166, 167, 166, 206, 20…
## $ hit_outs_avg <dbl> 23.6, 21.6, 27.9, 36.6, 16.6, 28.6, 23.7, 23.9, 23.7…
## $ clearances_tot <dbl> 158, 135, 141, 152, 154, 155, 156, 167, 140, 155, 11…
## $ clearances_avg <dbl> 19.8, 16.9, 20.1, 21.7, 22.0, 22.1, 22.3, 23.9, 20.0…
## $ centre_clr_tot <dbl> 38, 30, 31, 34, 42, 40, 35, 38, 42, 48, 28, 37, 40, …
## $ centre_clr_avg <dbl> 4.8, 3.8, 4.4, 4.9, 6.0, 5.7, 5.0, 5.4, 6.0, 6.0, 4.…
## $ stoppages_tot <dbl> 120, 105, 110, 118, 112, 115, 121, 129, 98, 107, 90,…
## $ stoppages_avg <dbl> 15.0, 13.1, 15.7, 16.9, 16.0, 16.4, 17.3, 18.4, 14.0…
## $ rebound50_tot <dbl> 143, 180, 154, 131, 115, 157, 135, 133, 180, 150, 15…
## $ rebound50_avg <dbl> 17.9, 22.5, 22.0, 18.7, 16.4, 22.4, 19.3, 19.0, 25.7…
## $ frees_for_tot <dbl> 152, 135, 83, 109, 85, 104, 101, 107, 84, 113, 123, …
## $ frees_for_avg <dbl> 19.0, 16.9, 11.9, 15.6, 12.1, 14.9, 14.4, 15.3, 12.0…
## $ frees_agst_tot <dbl> 122, 129, 99, 95, 114, 95, 111, 111, 103, 103, 115, …
## $ frees_agst_avg <dbl> 15.3, 16.1, 14.1, 13.6, 16.3, 13.6, 15.9, 15.9, 14.7…
## $ tackles_tot <dbl> 445, 435, 346, 396, 418, 390, 386, 386, 413, 484, 43…
## $ tackles_avg <dbl> 55.6, 54.4, 49.4, 56.6, 59.7, 55.7, 55.1, 55.1, 59.0…
## $ one_percent_s_tot <dbl> 163, 166, 138, 151, 123, 128, 164, 166, 199, 177, 16…
## $ one_percent_s_avg <dbl> 20.4, 20.8, 19.7, 21.6, 17.6, 18.3, 23.4, 23.7, 28.4…
## $ bounces_tot <dbl> 13, 36, 15, 26, 29, 13, 18, 32, 9, 25, 10, 20, 29, 4…
## $ bounces_avg <dbl> 1.6, 4.5, 2.1, 3.7, 4.1, 1.9, 2.6, 4.6, 1.3, 3.1, 1.…
## $ inside50_tot <dbl> 263, 194, 164, 164, 183, 126, 196, 173, 179, 221, 17…
## $ inside50_avg <dbl> 32.9, 24.3, 23.4, 23.4, 26.1, 18.0, 28.0, 24.7, 25.6…
## $ goals_tot <dbl> 45, 35, 39, 32, 25, 23, 37, 31, 33, 38, 21, 42, 34, …
## $ goals_avg <dbl> 5.6, 4.4, 5.6, 4.6, 3.6, 3.3, 5.3, 4.4, 4.7, 4.8, 3.…
## $ behinds_tot <dbl> 38, 26, 19, 29, 34, 15, 31, 42, 32, 41, 47, 29, 26, …
## $ behinds_avg <dbl> 4.8, 3.3, 2.7, 4.1, 4.9, 2.1, 4.4, 6.0, 32.0, 41.0, …
## $ goal_assists_tot <dbl> 19, 19, 23, 18, 15, 15, 20, 17, 14, 18, 9, 21, 16, 1…
## $ goal_assists_avg <dbl> 2.4, 2.4, 3.3, 2.6, 2.1, 2.1, 2.9, 2.4, 2.0, 2.3, 1.…
## $ goal_acc_percent <dbl> 36.0, 40.2, 50.6, 45.7, 34.2, 43.4, 37.4, 34.4, 42.3…
## $ interchanges_tot <dbl> 0, 57, 8, 30, 33, 54, 37, 30, 0, 0, 0, 0, 0, 0, 0, 0
## $ interchanges_avg <dbl> 0, 57, 8, 30, 33, 54, 37, 30, 0, 0, 0, 0, 0, 0, 0, 0
## $ year <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2018…
glimpse(players)
## Rows: 469
## Columns: 45
## $ player <chr> "Aasta O'Connor", "Abbey Holmes", "Aimee Schmid…
## $ club <chr> "WB", "ADEL", "GWS", "MELB", "FRE", "GWS", "BL"…
## $ kicks_tot <dbl> 9, 35, 21, 21, 29, 47, 29, 7, 63, 4, 15, 15, 33…
## $ kicks_avg <dbl> 2.3, 4.4, 3.0, 5.3, 4.8, 6.7, 3.6, 1.8, 9.0, 1.…
## $ handballs_tot <dbl> 14, 38, 17, 9, 8, 20, 29, 5, 16, 9, 7, 11, 17, …
## $ handballs_avg <dbl> 3.5, 4.8, 2.4, 2.3, 1.3, 2.9, 3.6, 1.3, 2.3, 3.…
## $ disposals_tot <dbl> 23, 73, 38, 30, 37, 67, 58, 12, 79, 13, 22, 26,…
## $ disposals_avg <dbl> 5.8, 9.1, 5.4, 7.5, 6.2, 9.6, 7.3, 3.0, 11.3, 4…
## $ cont_poss_tot <dbl> 12, 51, 13, 18, 20, 35, 33, 7, 35, 11, 7, 8, 30…
## $ cont_poss_avg <dbl> 3.0, 6.4, 1.9, 4.5, 3.3, 5.0, 4.1, 1.8, 5.0, 3.…
## $ uncont_poss_tot <dbl> 12, 27, 23, 12, 16, 23, 28, 5, 42, 2, 13, 18, 2…
## $ uncont_poss_avg <dbl> 3.0, 3.4, 3.3, 3.0, 2.7, 3.3, 3.5, 1.3, 6.0, 0.…
## $ disp_eff_percent <dbl> 65.2, 52.1, 55.3, 50.0, 48.6, 59.7, 56.9, 33.3,…
## $ clangers_tot <dbl> 8, 17, 8, 6, 8, 10, 10, 9, 22, 5, 5, 2, 13, 14,…
## $ clangers_avg <dbl> 2.0, 2.1, 1.1, 1.5, 1.3, 1.4, 1.3, 2.3, 3.1, 1.…
## $ marks_tot <dbl> 4, 9, 15, 8, 2, 6, 6, 2, 16, 1, 6, 6, 15, 10, 1…
## $ marks_avg <dbl> 1.0, 1.1, 2.1, 2.0, 0.3, 0.9, 0.8, 0.5, 2.3, 0.…
## $ cont_marks_tot <dbl> 0, 4, 1, 5, 1, 0, 0, 0, 0, 1, 0, 0, 6, 5, 0, 1,…
## $ cont_marks_avg <dbl> 0.0, 0.5, 0.1, 1.3, 0.2, 0.0, 0.0, 0.0, 0.0, 0.…
## $ marks50_tot <dbl> 2, 2, 3, 3, 0, 0, 0, 0, 1, 1, 2, 1, 0, 7, 0, 0,…
## $ marks50_avg <dbl> 0.5, 0.3, 0.4, 0.8, 0.0, 0.0, 0.0, 0.0, 0.1, 0.…
## $ hit_outs_tot <dbl> 24, 0, 0, 0, 6, 0, 1, 0, 0, 47, 0, 0, 89, 2, 0,…
## $ hit_outs_avg <dbl> 6.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.1, 0.0, 0.0, 15…
## $ clearances_tot <dbl> 0, 5, 0, 3, 5, 4, 7, 0, 11, 2, 0, 1, 10, 1, 5, …
## $ clearances_avg <dbl> 0.0, 0.6, 0.0, 0.8, 0.8, 0.6, 0.9, 0.0, 1.6, 0.…
## $ frees_for_tot <dbl> 1, 8, 1, 1, 0, 8, 4, 1, 7, 2, 1, 2, 7, 4, 5, 9,…
## $ frees_for_avg <dbl> 0.3, 1.0, 0.1, 0.3, 0.0, 1.1, 0.5, 0.3, 1.0, 0.…
## $ frees_agst_tot <dbl> 3, 2, 3, 2, 2, 1, 5, 5, 5, 1, 2, 0, 6, 6, 5, 5,…
## $ frees_agst_avg <dbl> 0.8, 0.3, 0.4, 0.5, 0.3, 0.1, 0.6, 1.3, 0.7, 0.…
## $ tackles_tot <dbl> 6, 16, 9, 8, 13, 21, 27, 4, 32, 16, 4, 10, 16, …
## $ tackles_avg <dbl> 1.5, 2.0, 1.3, 2.0, 2.2, 3.0, 3.4, 1.0, 4.6, 5.…
## $ one_percent_s_tot <dbl> 6, 5, 5, 0, 11, 14, 3, 3, 7, 4, 0, 0, 15, 2, 15…
## $ one_percent_s_avg <dbl> 1.5, 0.6, 0.7, 0.0, 1.8, 2.0, 0.4, 0.8, 1.0, 1.…
## $ bounces_tot <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 2, 0, 4, 1, 0, 0, 0, 2,…
## $ bounces_avg <dbl> 0.0, 0.0, 0.0, 0.0, 0.2, 0.1, 0.0, 0.0, 0.3, 0.…
## $ goals_tot <dbl> 1, 2, 3, 0, 0, 0, 0, 0, 3, 0, 2, 0, 2, 9, 0, 0,…
## $ goals_avg <dbl> 0.3, 0.3, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.…
## $ behinds_tot <dbl> 0, 2, 0, 2, 0, 1, 1, 0, 1, 0, 2, 1, 0, 3, 0, 1,…
## $ behinds_avg <dbl> 0.0, 0.3, 0.0, 0.5, 0.0, 0.1, 0.1, 0.0, 0.1, 0.…
## $ goal_assists_tot <dbl> 1, 2, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,…
## $ goal_assists_avg <dbl> 0.3, 0.3, 0.0, 0.3, 0.0, 0.1, 0.0, 0.0, 0.0, 0.…
## $ goal_acc_percent <dbl> 100.0, 40.0, 50.0, 0.0, 0.0, 0.0, 0.0, 0.0, 75.…
## $ matches <dbl> 4, 8, 7, 4, 6, 7, 8, 4, 7, 3, 4, 7, 7, 7, 7, 7,…
## $ time_on_ground_percent <dbl> 73.6, 64.5, 82.4, 63.7, 64.9, 88.6, 57.5, 56.2,…
## $ year <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,…
Using the teams dataset, make a barchart of the total kicks by club. What do you learn?
ggplot(teams,
aes(x = club,
y = kicks_tot)) +
geom_col()
Take that same barplot and reorder the club by kicks_tot, what changes?
ggplot(teams,
aes(x = reorder(club, kicks_tot),
y = kicks_tot)) +
geom_col() +
coord_flip()
To compare teams, should you use average or total numbers? Explain your thinking.
Answer: > Average would be a better comparison, because not all teams may have played the same number of games.
Make a scatterplot of average kicks by average handballs. Use an aspect ratio of 1. What do you learn?
ggplot(teams,
aes(kicks_avg,
handballs_avg)) +
geom_point() +
theme(aspect.ratio = 1)
Nothing can be said about the plot per se. Perhaps
plotly()can provide more information on individual observation.
Use the plotly package to make the previous plot interactive (with the ggplotly function). Report the name of the team(s) that has(ve) the highest on both variables.
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplot(teams,
aes(kicks_avg,
handballs_avg,
# this label option gets passed to ggplotly
label = club)) +
geom_point() +
theme(aspect.ratio = 1)
# ggplotly uses some programming magic and uses the last ggplot object you made
# to make it interactive.
ggplotly()
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
Collingwood has the highest kick_avg
Melbourne has highest handballs_avg
Make a scatterplot of average frees for (frees_for_avg) by against (frees_agst_avg), for each season. Use an aspect ratio of 1. What do you learn?
ggplot(teams,
aes(x = frees_for_avg,
y = frees_agst_avg) ) +
geom_point() +
theme(aspect.ratio = 1)
Nothing can be said about the plot per se. Perhaps
plotly()can provide more information on individual observation.
Make the plot interactive and report the name of the team that has the most and the least free_against_avg and frees_for_avg.
# use the similar ggplotly code from before
ggplot(teams,
aes(frees_for_avg,
frees_agst_avg,
text = club)) +
geom_point() +
theme(aspect.ratio = 1)
ggplotly()
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
frees against average: Collingwood has the most ; Brisbane has the least
frees for average: Adelaide Crows has the most ; Carlton has the least
Using the players data, make a side-by-side boxplot of the average kicks by each club. What do you learn?
ggplot(players,
aes(club,
kicks_avg)) +
geom_boxplot()
Create a ggplotly() to find out which team had the player with the highest average kicks. Do some teams appear to have “star” players, and others have more even skills in this area?
hint: This should show points for the avegare kicks for each player by club, where the ggplotly text shows the player name.
ggplot(players,
aes(club,
kicks_avg,
text = player)) +
geom_point() +
theme(aspect.ratio = 1)
ggplotly()
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
Erin Phillips is the star player of Adel
Emma Kearney is the star player of WB, close to Erin Phillips